############ Replication Code - "Investigator Characteristics and Respondent Behavior in Online Surveys"
############ 

#### Clear workspace
rm(list=ls())
 
#### Libraries
library(foreign)
library(sandwich)
library(lmtest)
library(ggplot2)
library(digest)
library(xtable)

# colour scheme
theme_bw1 <- function(base_size = 16, base_family = "") {  
  theme_bw(base_size = base_size, base_family = base_family) %+replace%
    theme(
      axis.text.x =       element_text(size = base_size*.9, colour = "black"),
      axis.text.y =       element_text(size = base_size, colour = "black"),
      axis.ticks =        element_line(colour = "grey75"),
      axis.title.x =      element_text(size=base_size, face="bold"),
      axis.title.y =      element_text(size = base_size,angle=90, face="bold"),
      plot.title =        element_text(face = "bold"),
      legend.position =   "none"
    )
}

#### Load dataset
researcher.data.full <- read.csv("data_investigator_characteristics.csv", stringsAsFactors=F)

##########################
#### This code was run but omitted from replication as the distributed replication file already contains ID1Number and ID2Number and omits mTurkID and IP 
#### Hash MTurk IDs
#researcher.data.full$ID1Hash <- sapply(researcher.data.full$mTurkID, function(x) digest(x, "md5"))
#researcher.data.full$ID1Hash[researcher.data.full$mTurkID == ""] <- NA ###  Missing MTurk IDs
#researcher.data.full$ID2Hash <- sapply(researcher.data.full$IP, function(x) digest(x, "md5"))

### Convert to arbitrary numeric identifiers
#researcher.data.full$ID1Number <- as.integer(as.factor(researcher.data.full$ID1Hash))
#researcher.data.full$ID2Number <- as.integer(as.factor(researcher.data.full$ID2Hash))
##########################
##########################


###Remove duplicated ID's, as described in SI Section B
### Starting time
researcher.data.full$startTime <- strptime(as.character(researcher.data.full$StartDate), format = "%m/%d/%y %I:%M %p")

### Sort by startTime
researcher.data.full <- researcher.data.full[order(researcher.data.full$startTime),]

### Is Duplicated by ID1Hash?
researcher.data.full$ID1Duplicated <- NA
researcher.data.full$ID1Duplicated[!is.na(researcher.data.full$ID1Number)] <- as.integer(duplicated(researcher.data.full$ID1Number[!is.na(researcher.data.full$ID1Number)]))
### Users with ID1Number #548 and #979 appears twice by IP, have MTurk ID only in the chronologically second one.
researcher.data.full$ID1Duplicated[researcher.data.full$ID1Number == 548|researcher.data.full$ID1Number == 979] <- 1

### For those with null ID1 hash, are they duplicated by IP
ID2Temp <- duplicated(researcher.data.full$ID2Number)
researcher.data.full$ID2Duplicated <- 0
researcher.data.full$ID2Duplicated[is.na(researcher.data.full$ID1Duplicated)] <- ID2Temp[is.na(researcher.data.full$ID1Duplicated)]

### Merge into a single "duplicated" vector
researcher.data.full$duplicated <- as.integer(researcher.data.full$ID1Duplicated)
researcher.data.full$duplicated[is.na(researcher.data.full$ID1Duplicated)] <- as.integer(researcher.data.full$ID2Duplicated[is.na(researcher.data.full$ID1Duplicated)])

### Drop duplicated observations from indviduals taking the survey multiple times.
researcher.data.full <- subset(researcher.data.full, duplicated != 1)

### How many observations are left?
nrow(researcher.data.full)

########## Get treatments
### Convert names to treatment labels
blackwomen <- c('Ebony Gaines', 'Deja Washington')
whitemen <- c('Brett Walsh', 'Connor Schroeder')
blackmen <- c('DeShawn Booker', 'Tyrone Robinson')
whitewomen <- c('Laurie Yoder', 'Molly Ryan')

# Treatments
researcher.data.full$treatment.whitewoman <- ifelse(researcher.data.full$researcherName %in% whitewomen, 1, 0)
researcher.data.full$treatment.blackwoman <- ifelse(researcher.data.full$researcherName %in% blackwomen, 1, 0)
researcher.data.full$treatment.whiteman <- ifelse(researcher.data.full$researcherName %in% whitemen, 1, 0)
researcher.data.full$treatment.blackman <- ifelse(researcher.data.full$researcherName %in% blackmen, 1, 0)

# Woman/Black treatments
researcher.data.full$treatment.woman <- ifelse(researcher.data.full$treatment.whitewoman == 1 | researcher.data.full$treatment.blackwoman == 1, 1, 0)
researcher.data.full$treatment.black <- ifelse(researcher.data.full$treatment.blackwoman == 1 | researcher.data.full$treatment.blackman == 1, 1, 0)

# Did you complete the survey and could we match you?
researcher.data.full$finishedAndMatched <- as.integer(!is.na(researcher.data.full$ID1Number)&researcher.data.full$Finished == 1)

# How many finished and matched
sum(researcher.data.full$finishedAndMatched)


############################
### More data setup 
############################

################################
#### Subset data to units that finished
researcher.data <- subset(researcher.data.full, finishedAndMatched  == 1)
#### All -99 are NAs
researcher.data[researcher.data==-99] <- NA


###################################################
### Pass attention checks?

researcher.data$ac1.pass <- ifelse(researcher.data$ac1.huffpo == 1 & researcher.data$ac1.Reuters == 1, 1, 0)
researcher.data$ac2.pass <- ifelse(researcher.data$ac2.Energy == 1 & researcher.data$ac2.GlobalTrade == 1, 1, 0)

researcher.data$ac1.pass[is.na(researcher.data$ac1.pass)] <- 0
researcher.data$ac2.pass[is.na(researcher.data$ac2.pass)] <- 0

researcher.data$ac.pass.both <- ifelse(researcher.data$ac1.pass== 1 & researcher.data$ac2.pass == 1, 1, 0)

####################################################
### Background covariates

researcher.data$gender.male <- ifelse(researcher.data$gender == 1, 1, 0)
researcher.data$gender.female <- ifelse(researcher.data$gender == 2, 1, 0)

researcher.data$democrat <- ifelse(researcher.data$party == 1, 1, 0)
researcher.data$republican <- ifelse(researcher.data$party ==2, 1, 0)
researcher.data$independent <- ifelse(researcher.data$party == 3, 1, 0)

researcher.data$voteyes <- ifelse(researcher.data$turnout == 3, 1, 0)
researcher.data$voteno <- ifelse(researcher.data$turnout == 2, 1, 0)
researcher.data$votedk <- ifelse(researcher.data$turnout == 1, 1, 0)

researcher.data$white <- ifelse(researcher.data$ethnicity.white == 1, 1, 0)
researcher.data$black <- ifelse(researcher.data$ethnicity.black == 1, 1, 0)


################################################

###### Racial_resentment Scale
# Convert to Factor
# recall that items 1/4 are reverse-coded compared to 2/3.

researcher.data$blacks.less.than.deserve[researcher.data$blacks.less.than.deserve == -99] <- NA
researcher.data$blacks.less.than.deserve <- factor(researcher.data$blacks.less.than.deserve, 
                                                   labels=c("Strongly Agree", "Agree", "Disagree", "Strongly Disagree", "Neither Agree nor Disagree"))
researcher.data$blacks.less.than.deserve <- factor(researcher.data$blacks.less.than.deserve,levels(researcher.data$blacks.less.than.deserve)[c(1,2,5,3,4)])

researcher.data$blacks.like.other.minorities[researcher.data$blacks.like.other.minorities == -99] <- NA
researcher.data$blacks.like.other.minorities <- factor(researcher.data$blacks.like.other.minorities, 
                                                       labels=c("Strongly Agree", "Agree", "Disagree", "Strongly Disagree", "Neither Agree nor Disagree"))
researcher.data$blacks.like.other.minorities <- factor(researcher.data$blacks.like.other.minorities,levels(researcher.data$blacks.like.other.minorities)[c(4,3,5,2,1)])

researcher.data$blacks.not.trying[researcher.data$blacks.not.trying == -99] <- NA
researcher.data$blacks.not.trying <- factor(researcher.data$blacks.not.trying, 
                                            labels=c("Strongly Agree", "Agree", "Disagree", "Strongly Disagree", "Neither Agree nor Disagree"))
researcher.data$blacks.not.trying <- factor(researcher.data$blacks.not.trying,levels(researcher.data$blacks.not.trying)[c(4,3,5,2,1)])

researcher.data$blacks.slavery.discrimination[researcher.data$blacks.slavery.discrimination == -99] <- NA
researcher.data$blacks.slavery.discrimination <- factor(researcher.data$blacks.slavery.discrimination, 
                                                        labels=c("Strongly Agree", "Agree", "Disagree", "Strongly Disagree", "Neither Agree nor Disagree"))

researcher.data$blacks.slavery.discrimination <- factor(researcher.data$blacks.slavery.discrimination,levels(researcher.data$blacks.slavery.discrimination)[c(1,2,5,3,4)])


researcher.data$racial.resentment <- ((as.numeric(researcher.data$blacks.less.than.deserve)-1)/4 + (as.numeric(researcher.data$blacks.like.other.minorities)-1)/4 +
                                        (as.numeric(researcher.data$blacks.not.trying)-1)/4 +  (as.numeric(researcher.data$blacks.slavery.discrimination)-1)/4)/4

###############################################
## Make Panel 1 of Figure 1 (Racial Resentment)
###############################################

### Model frame for predictions:
prediction.frame <- data.frame(treatment.black = c(1,0))

###### Estimate effect on racial resentment
researcher.data.resentment <- subset(researcher.data, !is.na(racial.resentment))
resentment <- lm(racial.resentment  ~ treatment.black, data=researcher.data.resentment)

### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims.resentment <- matrix(ncol=length(coef(resentment)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.resentment[sample(1:nrow(researcher.data.resentment), nrow(researcher.data.resentment), replace=T),]
  researcher.boot <- lm(racial.resentment~ treatment.black , data=researcher.data.boot)
  boot.estims.resentment[it,] <- coef(researcher.boot) 
}

resentment_vcov <- cov(boot.estims.resentment)

resentment_pval <- round(coeftest(resentment, resentment_vcov)[2,4], 3)
resentment_results <- data.frame(treatments = c(paste("Black Name\nn=",sum(model.frame(resentment)$treatment.black),sep=""), 
                                                paste("White Name\nn=",sum(model.frame(resentment)$treatment.black == 0),sep="")),
                                 point = predict(resentment, newdata=prediction.frame),
                                 se = sqrt(c(resentment_vcov[1,1] + resentment_vcov[2,2] + 2*resentment_vcov[1,2],resentment_vcov[1,1])))

resentment_results$lower95 <- resentment_results$point - qnorm(.975)*resentment_results$se
resentment_results$upper95 <- resentment_results$point + qnorm(.975)*resentment_results$se


#### Plot racial resentment

resentment_diff <- data.frame(treatments = c(paste("Black Name (vs. White Name)\nn=",nrow(model.frame(resentment)),sep="")), 
           point = coef(resentment)[2],
           se = sqrt(resentment_vcov[2,2]))

resentment_diff$lower95 <- resentment_diff$point - qnorm(.975)*resentment_diff$se
resentment_diff$upper95 <- resentment_diff$point + qnorm(.975)*resentment_diff$se

#### Plot racial resentment - difference
pdf("figures/Figure1_Panel1.pdf", width=4, height=7.1)
p = ggplot(resentment_diff, aes(y=point, x=treatments))
p = p + geom_hline(yintercept = 0,size=1 ,colour="grey50", linetype="dashed") 
p = p + scale_y_continuous(name="Effect on Racial Resentment Scale", limits=c(-.2, .2))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("p-value of difference = ",resentment_pval, sep=""))
p = p + ggtitle("Effect of researcher race\non racial resentment\n")
p = p + theme_bw1()
print(p)
dev.off()

##################################################
## Make Panel 2 of Figure 1 (Black President)
##################################################

######## Black president
researcher.data$president.black.yes <- ifelse(researcher.data$president.black == 1, 1, 0)
researcher.data$president.black.no <- ifelse(researcher.data$president.black == 2, 1, 0)


researcher.data.blackpres <- subset(researcher.data, !is.na(researcher.data$president.black.yes))
black_president <- lm(president.black.yes ~ treatment.black, data=researcher.data.blackpres)

### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims.blackpres <- matrix(ncol=length(coef(black_president)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.blackpres[sample(1:nrow(researcher.data.blackpres), nrow(researcher.data.blackpres), replace=T),]
  researcher.boot <- lm(president.black.yes ~ treatment.black , data=researcher.data.boot)
  boot.estims.blackpres[it,] <- coef(researcher.boot) 
}

black_president_vcov <- cov(boot.estims.blackpres)

black_president_pval <- round(coeftest(black_president, black_president_vcov)[2,4], 3)
prediction.frame <- data.frame(treatment.black = c(1,0))

black_president_results <- data.frame(treatments = c(paste("Black Name\nn=",sum(model.frame(black_president)$treatment.black),sep=""), 
                                                     paste("White Name\nn=",sum(model.frame(black_president)$treatment.black == 0),sep="")),
                                      point = predict(black_president, newdata=prediction.frame),
                                      se = sqrt(c(black_president_vcov[1,1] + black_president_vcov[2,2] + 2*black_president_vcov[1,2],black_president_vcov[1,1])))

black_president_results$lower95 <- black_president_results$point - qnorm(.975)*black_president_results$se
black_president_results$upper95 <- black_president_results$point + qnorm(.975)*black_president_results$se


blackpres_diff <- data.frame(treatments = c(paste("Black Name (vs. White Name)\nn=",nrow(model.frame(black_president)),sep="")), 
                              point = coef(black_president)[2],
                              se = sqrt(black_president_vcov[2,2]))

blackpres_diff$lower95 <- blackpres_diff$point - qnorm(.975)*blackpres_diff$se
blackpres_diff$upper95 <- blackpres_diff$point + qnorm(.975)*blackpres_diff$se

#### Plot black president - difference
pdf("figures/Figure1_Panel2.pdf", width=4, height=7.1)
p = ggplot(blackpres_diff, aes(y=point, x=treatments))

p = p + geom_hline(yintercept = 0,size=1 ,colour="grey50", linetype="dashed") 
p = p + scale_y_continuous(name="Effect on share that would vote for a black president", limits=c(-.2, .2))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("p-value of difference = ",black_president_pval, sep=""))
p = p + ggtitle("Effect of researcher race on\nwillingness to vote for\na black president")
p = p + theme_bw1()
print(p)
dev.off()


##############################################
## Now make Figure 2 Panel 1 (Gender Equality)
##############################################

###### Women's Role
researcher.data$womenrole[researcher.data$womenrole == -99] <- NA
researcher.data$womenrole.equal <- ifelse(researcher.data$womenrole == 1, 1, 0)
researcher.data$womenrole.home <- ifelse(researcher.data$womenrole == 2, 1, 0)
researcher.data$womenrole.dk <- ifelse(researcher.data$womenrole == 3, 1, 0)


researcher.data.womenequal <- subset(researcher.data, !is.na(researcher.data$womenrole.equal))
women_equal <- lm(womenrole.equal ~ treatment.woman, data=researcher.data.womenequal)


### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims.womenequal <- matrix(ncol=length(coef(women_equal)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.womenequal[sample(1:nrow(researcher.data.womenequal), nrow(researcher.data.womenequal), replace=T),]
  researcher.boot <- lm(womenrole.equal ~ treatment.woman , data=researcher.data.boot)
  boot.estims.womenequal[it,] <- coef(researcher.boot) 
}

women_equal_vcov <- cov(boot.estims.womenequal)


women_equal_pval <- round(coeftest(women_equal, women_equal_vcov)[2,4], 3)
prediction.frame.woman <- data.frame(treatment.woman = c(1,0))

women_equal_results <- data.frame(treatments = c(paste("Female Name\nn=",sum(model.frame(women_equal)$treatment.woman),sep=""), 
                                                 paste("Male Name\nn=",sum(model.frame(women_equal)$treatment.woman == 0),sep="")),
                                  point = predict(women_equal, newdata=prediction.frame.woman),
                                  se = sqrt(c(women_equal_vcov[1,1] + women_equal_vcov[2,2] + 2*women_equal_vcov[1,2],women_equal_vcov[1,1])))

women_equal_results$lower95 <- women_equal_results$point - qnorm(.975)*women_equal_results$se
women_equal_results$upper95 <- women_equal_results$point + qnorm(.975)*women_equal_results$se


#### Alternate figure - difference
women_equal_diff <- data.frame(treatments = c(paste("Female Name (vs. Male Name)\nn=",nrow(model.frame(women_equal)),sep="")), 
                             point = coef(women_equal)[2],
                             se = sqrt(women_equal_vcov[2,2]))

women_equal_diff$lower95 <- women_equal_diff$point - qnorm(.95)*women_equal_diff$se
women_equal_diff$upper95 <- women_equal_diff$point + qnorm(.95)*women_equal_diff$se

### BH-adjusted confidence bounds = 1 - .05 * (1/3)% bound
women_equal_diff$lowerBH <-  women_equal_diff$point - qnorm(1 - .05 * (1/3))*women_equal_diff$se
women_equal_diff$upperBH <-  women_equal_diff$point + qnorm(1 - .05 * (1/3))*women_equal_diff$se

#### Plot women equal
pdf("figures/Figure2_Panel1.pdf", width=4, height=7.1)
p = ggplot(women_equal_diff, aes(y=point, x=treatments))

p = p + geom_hline(yintercept = 0,size=1 ,colour="grey50", linetype="dashed") 
p = p + scale_y_continuous(name="Effect on share that say women should be equal to men", limits=c(-.2, .2))
p = p + geom_pointrange(aes(ymin=lowerBH,ymax=upperBH), size= 1.5)

p = p + scale_x_discrete(name=paste("p-value of difference = ",women_equal_pval, sep=""))
p = p + ggtitle("Effect of researcher gender on\nattitudes on gender equality\n")
p = p + theme_bw1()
print(p)
dev.off()

##########################################
## Make Figure 2 Panel 2 (Woman President)
##########################################

######## Woman president
researcher.data$president.woman.yes <- ifelse(researcher.data$president.woman == 1, 1, 0)
researcher.data$president.woman.no <- ifelse(researcher.data$president.woman == 2, 1, 0)

researcher.data.womanpres <- subset(researcher.data, !is.na(researcher.data$president.woman.yes))
woman_president <- lm(president.woman.yes ~ treatment.woman, data=researcher.data.womanpres)

### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims.womanpres <- matrix(ncol=length(coef(woman_president)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.womanpres[sample(1:nrow(researcher.data.womanpres), nrow(researcher.data.womanpres), replace=T),]
  researcher.boot <- lm(president.woman.yes ~ treatment.woman , data=researcher.data.boot)
  boot.estims.womanpres[it,] <- coef(researcher.boot) 
}

woman_president_vcov <- cov(boot.estims.womanpres)

# Marginals
woman_president_pval <- round(coeftest(woman_president, woman_president_vcov)[2,4], 3)
prediction.frame.woman <- data.frame(treatment.woman = c(1,0))

woman_president_results <- data.frame(treatments = c(paste("Female Name\nn=",sum(model.frame(woman_president)$treatment.woman),sep=""), 
                                                     paste("Male Name\nn=",sum(model.frame(woman_president)$treatment.woman == 0),sep="")),
                                      point = predict(woman_president, newdata=prediction.frame.woman),
                                      se = sqrt(c(woman_president_vcov[1,1] + woman_president_vcov[2,2] + 2*woman_president_vcov[1,2],woman_president_vcov[1,1])))

woman_president_results$lower95 <- woman_president_results$point - qnorm(.975)*woman_president_results$se
woman_president_results$upper95 <- woman_president_results$point + qnorm(.975)*woman_president_results$se


#### difference
woman_president_diff <- data.frame(treatments = c(paste("Female Name (vs. Male Name)\nn=",nrow(model.frame(woman_president)),sep="")), 
                               point = coef(woman_president)[2],
                               se = sqrt(woman_president_vcov[2,2]))

woman_president_diff$lower95 <- woman_president_diff$point - qnorm(.975)*woman_president_diff$se
woman_president_diff$upper95 <- woman_president_diff$point + qnorm(.975)*woman_president_diff$se



#### Plot woman president - difference
pdf("figures/Figure2_Panel2.pdf", width=4, height=7.1)
p = ggplot(woman_president_diff, aes(y=point, x=treatments))

p = p + geom_hline(yintercept = 0,size=1 ,colour="grey50", linetype="dashed") 
p = p + scale_y_continuous(name="Effect on share that would vote for a woman president", limits=c(-.2, .2))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("p-value of difference = ",woman_president_pval, sep=""))
p = p + ggtitle("Effect of researcher gender on\nwillingness to vote for\na woman president")
p = p + theme_bw1()
print(p)
dev.off()

##############################################
## Make Figure 1 Panel 3 and Figure 2 Panel 3
##############################################

#### Service spending : both race- and gender-of-interviewer effects
researcher.data$services.more <- ifelse(researcher.data$services == 2, 1, 0)
researcher.data$services.cut <- ifelse(researcher.data$services == 1, 1, 0)

researcher.data.servicesmore <- subset(researcher.data, !is.na(researcher.data$services.more))

service_cut_black <- lm(services.more ~ treatment.black, data=researcher.data.servicesmore)


### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims.servicesmoreblack <- matrix(ncol=length(coef(service_cut_black)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.servicesmore[sample(1:nrow(researcher.data.servicesmore), nrow(researcher.data.servicesmore), replace=T),]
  researcher.boot <- lm(services.more ~ treatment.black, data=researcher.data.boot)
  boot.estims.servicesmoreblack[it,] <- coef(researcher.boot) 
}

service_cut_black_vcov <- cov(boot.estims.servicesmoreblack)


# Marginals
service_cut_black_pval <- round(coeftest(service_cut_black, service_cut_black_vcov)[2,4], 3)
prediction.frame.black <- data.frame(treatment.black = c(1,0))

service_cut_black_results <- data.frame(treatments = c(paste("Black Name\nn=",sum(model.frame(service_cut_black)$treatment.black),sep=""), 
                                                       paste("White Name\nn=",sum(model.frame(service_cut_black)$treatment.black == 0),sep="")),
                                        point = predict(service_cut_black, newdata=prediction.frame.black),
                                        se = sqrt(c(service_cut_black_vcov[1,1] + service_cut_black_vcov[2,2] + 2*service_cut_black_vcov[1,2],service_cut_black_vcov[1,1])))

service_cut_black_results$lower95 <- service_cut_black_results$point - qnorm(.975)*service_cut_black_results$se
service_cut_black_results$upper95 <- service_cut_black_results$point + qnorm(.975)*service_cut_black_results$se


service_cut_black_diff <- data.frame(treatments = c(paste("Black Name (vs. White Name)\nn=",nrow(model.frame(service_cut_black)),sep="")), 
                                   point = coef(service_cut_black)[2],
                                   se = sqrt(service_cut_black_vcov[2,2]))

service_cut_black_diff$lower95 <- service_cut_black_diff$point - qnorm(.975)*service_cut_black_diff$se
service_cut_black_diff$upper95 <- service_cut_black_diff$point + qnorm(.975)*service_cut_black_diff$se

#### Plot service cut black - diff
pdf("figures/Figure1_Panel3.pdf", width=4, height=7.1)
p = ggplot(service_cut_black_diff, aes(y=point, x=treatments))

p = p + geom_hline(yintercept = 0,size=1 ,colour="grey50", linetype="dashed") 
p = p + scale_y_continuous(name="Effect on share supporting greater service spending", limits=c(-.2, .2))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("p-value of difference = ",service_cut_black_pval, sep=""))
p = p + ggtitle("Effect of researcher race on\nwillingness to support\npublic service expansion")
p = p + theme_bw1()
print(p)
dev.off()

### Gender effect

service_cut_woman <- lm(services.more ~ treatment.woman, data=researcher.data.servicesmore)


### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims.servicesmorewoman <- matrix(ncol=length(coef(service_cut_woman)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.servicesmore[sample(1:nrow(researcher.data.servicesmore), nrow(researcher.data.servicesmore), replace=T),]
  researcher.boot <- lm(services.more ~ treatment.woman, data=researcher.data.boot)
  boot.estims.servicesmorewoman[it,] <- coef(researcher.boot) 
}

service_cut_woman_vcov <- cov(boot.estims.servicesmorewoman)

service_cut_woman_pval <- round(coeftest(service_cut_woman, service_cut_woman_vcov)[2,4], 3)
prediction.frame.woman <- data.frame(treatment.woman = c(1,0))

service_cut_woman_results <- data.frame(treatments = c(paste("Female Name\nn=",sum(model.frame(service_cut_woman)$treatment.woman),sep=""), 
                                                       paste("Male Name\nn=",sum(model.frame(service_cut_woman)$treatment.woman == 0),sep="")),
                                        point = predict(service_cut_woman, newdata=prediction.frame.woman),
                                        se = sqrt(c(service_cut_woman_vcov[1,1] + service_cut_woman_vcov[2,2] + 2*service_cut_woman_vcov[1,2],service_cut_woman_vcov[1,1])))

service_cut_woman_results$lower95 <- service_cut_woman_results$point - qnorm(.975)*service_cut_woman_results$se
service_cut_woman_results$upper95 <- service_cut_woman_results$point + qnorm(.975)*service_cut_woman_results$se


#### Plot service cut woman

service_cut_woman_diff <- data.frame(treatments = c(paste("Female Name (vs. Male Name)\nn=",nrow(model.frame(service_cut_woman)),sep="")), 
                                     point = coef(service_cut_woman)[2],
                                     se = sqrt(service_cut_woman_vcov[2,2]))

service_cut_woman_diff$lower95 <- service_cut_woman_diff$point - qnorm(.975)*service_cut_woman_diff$se
service_cut_woman_diff$upper95 <- service_cut_woman_diff$point + qnorm(.975)*service_cut_woman_diff$se

### BH-adjusted confidence bounds = 1 - .05 * (2/3)% bound
service_cut_woman_diff$lowerBH <- service_cut_woman_diff$point - qnorm(.9833333)*service_cut_woman_diff$se
service_cut_woman_diff$upperBH <- service_cut_woman_diff$point + qnorm(.9833333)*service_cut_woman_diff$se

#### Plot service cut woman - difference
pdf("figures/Figure2_Panel3.pdf", width=4, height=7.1)
p = ggplot(service_cut_woman_diff, aes(y=point, x=treatments))
#p = p + coord_flip()  
p = p + geom_hline(yintercept = 0,size=1 ,colour="grey50", linetype="dashed") 
p = p + scale_y_continuous(name="Effect on share supporting greater service spending", limits=c(-.2, .2))
p = p + geom_pointrange(aes(ymin=lowerBH,ymax=upperBH), size= 1.5)

p = p + scale_x_discrete(name=paste("p-value of difference = ",service_cut_woman_pval, sep=""))
p = p + ggtitle("Effect of researcher gender on\nwillingness to support\npublic service expansion")
p = p + theme_bw1()
print(p)
dev.off()

############### End of code for Figures 1 and 2. ####################################

#### pull in counts of MTurk postings and generate Table 2
turk <- read.csv("MTurk_postings.csv")
head(turk)
print(xtable(turk), include.rownames=F)

############################################
### Supplementary information
############################################

##################################
#### Data summaries
##################################

### Outcome Summaries
## List of variables 
## (Table A3 in SI section A)
all.variables <- c("racial.resentment", "president.black.yes", "president.woman.yes","womenrole.equal","womenrole.home","services.more","services.cut")

outcome.summaries <- data.frame(Variable = c("Racial Resentment", "Would vote for a black president", "Would vote for a woman president", "Women's role: Equal", "Women's role: Home", "Expand public services","Cut public services"),
                                Mean=sapply(all.variables, function(x) mean(researcher.data[[x]], na.rm=T)), 
                                Std.Dev = sapply(all.variables, function(x) sd(researcher.data[[x]], na.rm=T)), 
                                N.Responses = sapply(all.variables, function(x) sum(!is.na(researcher.data[[x]]))))

print(xtable(outcome.summaries, caption = "Summary statistics for outcome variables", digits = 3), include.rownames=FALSE)

#### Covariates by treatment assignment
balance.tests <- data.frame(Variable = c("Prop. Male", "Prop. Democrat", "Prop. Republican", "Prop. Voted in 2012", "Prop. White", "Prop. Black"),
                            N.Respondents = c(sum(!is.na(researcher.data$gender.male)), sum(!is.na(researcher.data$democrat)), sum(!is.na(researcher.data$republican)),
                                              sum(!is.na(researcher.data$voteyes)), sum(!is.na(researcher.data$white)), sum(!is.na(researcher.data$black))),
                            Researcher.Male = NA, Researcher.Female = NA, t.stat.gender = NA, Researcher.White = NA, Researcher.Black = NA, t.stat.race = NA)

balance.tests[1, c(3,4)] <- t.test(gender.male ~ treatment.woman, data=researcher.data)$estimate
balance.tests[1, c(5)] <- t.test(gender.male ~ treatment.woman, data=researcher.data)$statistic

balance.tests[1, c(6,7)] <- t.test(gender.male ~ treatment.black, data=researcher.data)$estimate
balance.tests[1, c(8)] <- t.test(gender.male ~ treatment.black, data=researcher.data)$statistic

balance.tests[2, c(3,4)] <- t.test(democrat ~ treatment.woman, data=researcher.data)$estimate
balance.tests[2, c(5)] <- t.test(democrat ~ treatment.woman, data=researcher.data)$statistic

balance.tests[2, c(6,7)] <- t.test(democrat ~ treatment.black, data=researcher.data)$estimate
balance.tests[2, c(8)] <- t.test(democrat ~ treatment.black, data=researcher.data)$statistic

balance.tests[3, c(3,4)] <- t.test(republican ~ treatment.woman, data=researcher.data)$estimate
balance.tests[3, c(5)] <- t.test(republican ~ treatment.woman, data=researcher.data)$statistic

balance.tests[3, c(6,7)] <- t.test(republican ~ treatment.black, data=researcher.data)$estimate
balance.tests[3, c(8)] <- t.test(republican ~ treatment.black, data=researcher.data)$statistic

balance.tests[4, c(3,4)] <- t.test(voteyes ~ treatment.woman, data=researcher.data)$estimate
balance.tests[4, c(5)] <- t.test(voteyes ~ treatment.woman, data=researcher.data)$statistic

balance.tests[4, c(6,7)] <- t.test(voteyes ~ treatment.black, data=researcher.data)$estimate
balance.tests[4, c(8)] <- t.test(voteyes ~ treatment.black, data=researcher.data)$statistic

balance.tests[5, c(3,4)] <- t.test(white ~ treatment.woman, data=researcher.data)$estimate
balance.tests[5, c(5)] <- t.test(white ~ treatment.woman, data=researcher.data)$statistic

balance.tests[5, c(6,7)] <- t.test(white ~ treatment.black, data=researcher.data)$estimate
balance.tests[5, c(8)] <- t.test(white ~ treatment.black, data=researcher.data)$statistic

balance.tests[6, c(3,4)] <- t.test(black ~ treatment.woman, data=researcher.data)$estimate
balance.tests[6, c(5)] <- t.test(black ~ treatment.woman, data=researcher.data)$statistic

balance.tests[6, c(6,7)] <- t.test(black ~ treatment.black, data=researcher.data)$estimate
balance.tests[6, c(8)] <- t.test(black ~ treatment.black, data=researcher.data)$statistic

balance.tests.gender <- balance.tests[,c(1,2,3,4,5)]
balance.tests.race <- balance.tests[,c(1,2,6,7,8)]

print(xtable(balance.tests.gender, caption = "Covariate means by treatment conditions - Researcher gender", digits = 3), include.rownames=FALSE)
print(xtable(balance.tests.race, caption = "Covariate means by treatment conditions - Researcher race", digits = 3), include.rownames=FALSE)


############################################
### Completion rates (As in Section C of SI)
############################################

#### Fit a regression model for treatment on completion 
finish <- lm(finishedAndMatched ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data.full)
### Bootstrap the vcov
set.seed(390)
boot.iter <- 5000
boot.estims <- matrix(ncol=length(coef(finish)), nrow=boot.iter)
for(it in 1:boot.iter){
  ### Resample
  researcher.data.boot <- researcher.data.full[sample(1:nrow(researcher.data.full), nrow(researcher.data.full), replace=T),]
  finish.boot <- lm(finishedAndMatched ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data.boot)
  boot.estims[it,] <- coef(finish.boot) 
}
### Save the variance covariance matrix
finish_vcov <- cov(boot.estims)[-5,-5]

##### Visualize the results
prediction.frame.finish <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

finish_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(finish)$treatment.whitewoman==1),sep=""), 
                                            paste("Black Woman\nn=",sum(model.frame(finish)$treatment.blackwoman == 1),sep=""),
                                            paste("Black Man\nn=",sum(model.frame(finish)$treatment.blackman == 1),sep=""),
                                            paste("White Man\nn=",sum(model.frame(finish)$treatment.whiteman == 1),sep="")),
                             point = predict(finish, newdata=prediction.frame.finish),
                             se = sqrt(c(finish_vcov[1,1] + finish_vcov[2,2] + 2*finish_vcov[1,2],
                                         finish_vcov[1,1] + finish_vcov[3,3] + 2*finish_vcov[1,3],
                                         finish_vcov[1,1] + finish_vcov[4,4] + 2*finish_vcov[1,4],
                                         finish_vcov[1,1])))


finish_results$lower95 <- finish_results$point - qnorm(.975)*finish_results$se
finish_results$upper95 <- finish_results$point + qnorm(.975)*finish_results$se

#### Plot completion rates
pdf("figures/FigureA1.pdf", width=7, height=5)
p = ggplot(finish_results, aes(y=point, x=treatments)) ### Initialize plot
p = p + scale_y_continuous(name="% Completing Survey", limits=c(.96,1)) ### Y-axis
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 2) ### 
p = p + scale_x_discrete(name=paste("Treatment conditions", sep=""))
p = p + ggtitle("Effect of treatments on survey non-completion rates")
p = p + theme_bw1()
print(p)
dev.off()

#############################################
##### SI Section D
#############################################

##############################################
#### Do we have imbalance in the treatment conditions?
##############################################

### Convert names to treatment labels
blackwomen <- c('Ebony Gaines', 'Deja Washington')

whitemen <- c('Brett Walsh', 'Connor Schroeder')

blackmen <- c('DeShawn Booker', 'Tyrone Robinson')

whitewomen <- c('Laurie Yoder', 'Molly Ryan')

researcher.data$treatVec <- NA
researcher.data$treatVec[researcher.data$treatment.whiteman == 1] <- "White Male"
researcher.data$treatVec[researcher.data$treatment.whitewoman == 1] <- "White Woman"
researcher.data$treatVec[researcher.data$treatment.blackman == 1] <- "Black Male"
researcher.data$treatVec[researcher.data$treatment.blackwoman == 1] <- "Black Woman"

set.seed(02138)
test.stat <- max(table(researcher.data$treatVec)) - min(table(researcher.data$treatVec))

total_num <- length(researcher.data$treatVec)
iter <- 100000
null_dist <- rep(NA, iter)

sim_dist <- rmultinom(iter, total_num, prob=c(.25, .25, .25, .25))
null_dist <- apply(sim_dist, 2, max) - apply(sim_dist, 2, min)  

### p-value of hypothesis test for non-randomization. Our observed distribution is consistent with
### uniform randomization.
p.val <- mean(as.integer(null_dist > test.stat))


################ Are there differences in demographics across our treatment conditions?

### Gender
woman_select <- lm(gender.female ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data)
woman_select_vcov <- vcovHC(woman_select)
coeftest(woman_select, woman_select_vcov)

prediction.frame.exploratory <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

woman_select_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(woman_select)$treatment.whitewoman==1),sep=""), 
                                                  paste("Black Woman\nn=",sum(model.frame(woman_select)$treatment.blackwoman == 1),sep=""),
                                                  paste("Black Man\nn=",sum(model.frame(woman_select)$treatment.blackman == 1),sep=""),
                                                  paste("White Man\nn=",sum(model.frame(woman_select)$treatment.whiteman == 1),sep="")),
                                   point = predict(woman_select, newdata=prediction.frame.exploratory),
                                   se = sqrt(c(woman_select_vcov[1,1] + woman_select_vcov[2,2] + 2*woman_select_vcov[1,2],
                                               woman_select_vcov[1,1] + woman_select_vcov[3,3] + 2*woman_select_vcov[1,3],
                                               woman_select_vcov[1,1] + woman_select_vcov[4,4] + 2*woman_select_vcov[1,4],
                                               woman_select_vcov[1,1])))


woman_select_results$lower95 <- woman_select_results$point - qnorm(.975)*woman_select_results$se
woman_select_results$upper95 <- woman_select_results$point + qnorm(.975)*woman_select_results$se

woman_select_fstat <- 1- pf(summary(woman_select)[[10]][1], summary(woman_select)[[10]][2], summary(woman_select)[[10]][3])


#### Plot % women in-sample
pdf("figures/FigureA2.pdf", width=7, height=4)
p = ggplot(woman_select_results, aes(y=point, x=treatments))
p = p + geom_hline(yintercept = 0.5,size=1 ,colour="black", linetype="dotted") 
p = p + scale_y_continuous(name="Share of female\nrespondents in sample", limits=c(.4, .6))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)
p = p + scale_x_discrete(name=paste("Treatment conditions\nF-test p-value = ", round(woman_select_fstat,4), sep=""))
p = p + ggtitle("Effect of researcher race and gender on\nselection into survey by gender")
p = p + theme_bw1()
print(p)
dev.off()

##################
# Make Figure A3 #
##################

# Panel 2: (Race - White)

white_select <- lm(white ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data)
white_select_vcov <- vcovHC(white_select)
coeftest(white_select, white_select_vcov)

prediction.frame.exploratory <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

white_select_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(white_select)$treatment.whitewoman==1),sep=""),
                                                  paste("Black Woman\nn=",sum(model.frame(white_select)$treatment.blackwoman == 1),sep=""),
                                                  paste("Black Man\nn=",sum(model.frame(white_select)$treatment.blackman == 1),sep=""),
                                                  paste("White Man\nn=",sum(model.frame(white_select)$treatment.whiteman == 1),sep="")),
                                   point = predict(white_select, newdata=prediction.frame.exploratory),
                                   se = sqrt(c(white_select_vcov[1,1] + white_select_vcov[2,2] + 2*white_select_vcov[1,2],
                                               white_select_vcov[1,1] + white_select_vcov[3,3] + 2*white_select_vcov[1,3],
                                               white_select_vcov[1,1] + white_select_vcov[4,4] + 2*white_select_vcov[1,4],
                                               white_select_vcov[1,1])))


white_select_results$lower95 <- white_select_results$point - qnorm(.975)*white_select_results$se
white_select_results$upper95 <- white_select_results$point + qnorm(.975)*white_select_results$se


white_select_fstat <- 1- pf(summary(white_select)[[10]][1], summary(white_select)[[10]][2], summary(white_select)[[10]][3])

#### Plot % white in-sample
pdf("figures/FigureA3_Panel2.pdf", width=7, height=4)
p = ggplot(white_select_results, aes(y=point, x=treatments))

p = p + scale_y_continuous(name="Share of white\nrespondents in sample", limits=c(.7, .9))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("Treatment conditions\nF-test p-value = ", round(white_select_fstat,4), sep=""))
p = p + ggtitle("Effect of researcher race and gender on\nselection into survey by race")
p = p + theme_bw1()
print(p)
dev.off()

# Panel 1: Race - Black

black_select <- lm(black ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data)
black_select_vcov <- vcovHC(black_select)
coeftest(black_select, black_select_vcov)

prediction.frame.exploratory <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

black_select_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(black_select)$treatment.whitewoman==1),sep=""), 
                                                  paste("Black Woman\nn=",sum(model.frame(black_select)$treatment.blackwoman == 1),sep=""),
                                                  paste("Black Man\nn=",sum(model.frame(black_select)$treatment.blackman == 1),sep=""),
                                                  paste("White Man\nn=",sum(model.frame(black_select)$treatment.whiteman == 1),sep="")),
                                   point = predict(black_select, newdata=prediction.frame.exploratory),
                                   se = sqrt(c(black_select_vcov[1,1] + black_select_vcov[2,2] + 2*black_select_vcov[1,2],
                                               black_select_vcov[1,1] + black_select_vcov[3,3] + 2*black_select_vcov[1,3],
                                               black_select_vcov[1,1] + black_select_vcov[4,4] + 2*black_select_vcov[1,4],
                                               black_select_vcov[1,1])))


black_select_results$lower95 <- black_select_results$point - qnorm(.975)*black_select_results$se
black_select_results$upper95 <- black_select_results$point + qnorm(.975)*black_select_results$se

black_select_fstat <- 1- pf(summary(black_select)[[10]][1], summary(black_select)[[10]][2], summary(black_select)[[10]][3])

### Plot %black
pdf("figures/FigureA3_Panel1.pdf", width=7, height=4)
p = ggplot(black_select_results, aes(y=point, x=treatments))
p = p + scale_y_continuous(name="Share of black\nrespondents in sample", limits=c(0, .15))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)
p = p + scale_x_discrete(name=paste("Treatment conditions\nF-test p-value = ", round(black_select_fstat,4), sep=""))
p = p + ggtitle("Effect of researcher race and gender on\nselection into survey by race")
p = p + theme_bw1()
print(p)
dev.off()

##################
# Make Figure A4 #
##################

# Panel 1: Democrats

dem_select <- lm(democrat ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data)
dem_select_vcov <- vcovHC(dem_select)
coeftest(dem_select, dem_select_vcov)


prediction.frame.exploratory <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

dem_select_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(dem_select)$treatment.whitewoman==1),sep=""), 
                                                paste("Black Woman\nn=",sum(model.frame(dem_select)$treatment.blackwoman == 1),sep=""),
                                                paste("Black Man\nn=",sum(model.frame(dem_select)$treatment.blackman == 1),sep=""),
                                                paste("White Man\nn=",sum(model.frame(dem_select)$treatment.whiteman == 1),sep="")),
                                 point = predict(dem_select, newdata=prediction.frame.exploratory),
                                 se = sqrt(c(dem_select_vcov[1,1] + dem_select_vcov[2,2] + 2*dem_select_vcov[1,2],
                                             dem_select_vcov[1,1] + dem_select_vcov[3,3] + 2*dem_select_vcov[1,3],
                                             dem_select_vcov[1,1] + dem_select_vcov[4,4] + 2*dem_select_vcov[1,4],
                                             dem_select_vcov[1,1])))


dem_select_results$lower95 <- dem_select_results$point - qnorm(.975)*dem_select_results$se
dem_select_results$upper95 <- dem_select_results$point + qnorm(.975)*dem_select_results$se

dem_select_fstat <- 1- pf(summary(dem_select)[[10]][1], summary(dem_select)[[10]][2], summary(dem_select)[[10]][3])

#### Plot % dem in-sample
pdf("figures/FigureA4_Panel1.pdf", width=7, height=4)
p = ggplot(dem_select_results, aes(y=point, x=treatments))

p = p + scale_y_continuous(name="Share of Democrat\nrespondents in sample", limits=c(.3, .6))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("Treatment conditions\nF-test p-value = ", round(dem_select_fstat,4), sep=""))
p = p + ggtitle("Effect of researcher race and gender on\nselection into survey by party ID")
p = p + theme_bw1()
print(p)
dev.off()

# Panel 2: Republicans

rep_select <- lm(republican ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data)
rep_select_vcov <- vcovHC(rep_select)
coeftest(rep_select, rep_select_vcov)


prediction.frame.exploratory <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

rep_select_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(rep_select)$treatment.whitewoman==1),sep=""), 
                                                paste("Black Woman\nn=",sum(model.frame(rep_select)$treatment.blackwoman == 1),sep=""),
                                                paste("Black Man\nn=",sum(model.frame(rep_select)$treatment.blackman == 1),sep=""),
                                                paste("White Man\nn=",sum(model.frame(rep_select)$treatment.whiteman == 1),sep="")),
                                 point = predict(rep_select, newdata=prediction.frame.exploratory),
                                 se = sqrt(c(rep_select_vcov[1,1] + rep_select_vcov[2,2] + 2*rep_select_vcov[1,2],
                                             rep_select_vcov[1,1] + rep_select_vcov[3,3] + 2*rep_select_vcov[1,3],
                                             rep_select_vcov[1,1] + rep_select_vcov[4,4] + 2*rep_select_vcov[1,4],
                                             rep_select_vcov[1,1])))


rep_select_results$lower95 <- rep_select_results$point - qnorm(.975)*rep_select_results$se
rep_select_results$upper95 <- rep_select_results$point + qnorm(.975)*rep_select_results$se

rep_select_fstat <- 1- pf(summary(rep_select)[[10]][1], summary(rep_select)[[10]][2], summary(rep_select)[[10]][3])

#### Plot % rep in-sample
pdf("figures/FigureA4_Panel2.pdf", width=7, height=4)
p = ggplot(rep_select_results, aes(y=point, x=treatments))

p = p + scale_y_continuous(name="Share of Republican\nrespondents in sample", limits=c(0.1, .3))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("Treatment conditions\nF-test p-value = ", round(rep_select_fstat,4), sep=""))
p = p + ggtitle("Effect of researcher race and gender on\nselection into survey by party ID")
p = p + theme_bw1()
print(p)
dev.off()


##########################################
## Look at Attention Checks (SI section E)
##########################################

ac_pass <- lm(ac.pass.both ~ treatment.whiteman, data=researcher.data)

ac_pass_vcov <- vcovHC(ac_pass)
coeftest(ac_pass, ac_pass_vcov)

ac_pass_pval <- round(coeftest(ac_pass, ac_pass_vcov)[2,4], 3)
prediction.frame.ac <- data.frame(treatment.whiteman = c(1,0))

ac_pass_results <- data.frame(treatments = c(paste("White, Male Name\nn=",sum(model.frame(ac_pass)$treatment.whiteman),sep=""), 
                                             paste("Other Name\nn=",sum(model.frame(ac_pass)$treatment.whiteman == 0),sep="")),
                              point = predict(ac_pass, newdata=prediction.frame.ac),
                              se = sqrt(c(ac_pass_vcov[1,1] + ac_pass_vcov[2,2] + 2*ac_pass_vcov[1,2], ac_pass_vcov[1,1])))

ac_pass_results$lower95 <- ac_pass_results$point - qnorm(.975)*ac_pass_results$se
ac_pass_results$upper95 <- ac_pass_results$point + qnorm(.975)*ac_pass_results$se


#### Plot attention check completion (Figure A5 in SI)
pdf("figures/FigureA5.pdf", width=5, height=7)
p = ggplot(ac_pass_results, aes(y=point, x=treatments))

p = p + scale_y_continuous(name="% that successfully completed both attention checks", limits=c(.7, 1))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)

p = p + scale_x_discrete(name=paste("Treatment conditions\np-value of difference = ",ac_pass_pval, sep=""))
p = p + ggtitle("Effect of researcher gender\n and race on attention\n check completion")
p = p + theme_bw1()
print(p)
dev.off()


##############################################################
#### Power calculations - using women equality as a hypothetical
#### (Reported in Section F of the SI)
##############################################################

#### Power calculations with multiple testing
### Parameters
set.seed(02138)
M <- 3
effect_size <- seq(from=0, to=.1, by=.0025)
sample_size <- 1000

iter <- 50000
alpha <- .05

### Variances
variances <- c(var(researcher.data.womenequal$womenrole.equal),
               var(researcher.data.womanpres$president.woman.yes),
               var(researcher.data.servicesmore$services.more) )*1.5 ### The third one is greater than .25, which is impossible
### Strict max at .25
variances[3] <- .25

##########################
### Store power calc result
power_results <- rep(0, length(effect_size))
names(power_results) <- effect_size

##### All effects true
### For each effect_size
for (size in effect_size){
  reject <- rep(0, iter)
  #### Draw test statistics
  for (i in 1:iter){
    ts_variance <- variances/sample_size + variances/sample_size
    effects <- rep(size, M)
    test_stats <- rnorm(3, effects/sqrt(ts_variance), 1)
    
    ## Sample true test statistic
    all_pvals <- pnorm(-abs(test_stats), 0, 1)*2
    
    ## Order pvalues
    ordered_stats <- sort(all_pvals)
    
    ## Which ordered statistic is the ``true" one 
    
    ## Do we reject?
    thresholds <- ((1:M)/M)*alpha
    
    ## If nothing is significant
    if (sum(ordered_stats < thresholds) == 0){
      reject[i] <- 0
    }else{
      
      max_thresh <- max((1:M)[ordered_stats < thresholds])
      reject[i] <- max_thresh/M
      
    }
    
  }
  power_results[as.character(size)] <- mean(reject)
}

power_results_all <- power_results

### Variances
variances <- c(var(researcher.data.womenequal$womenrole.equal),
               var(researcher.data.womanpres$president.woman.yes),
               var(researcher.data.servicesmore$services.more) )*1.5

### Strict max at .25
variances[3] <- .25

############## One effect is true, others are 0

set.seed(02138)
##########################
### Store power calc result
power_results <- rep(0, length(effect_size))
names(power_results) <- effect_size

##### One effect is true
### For each effect_size
for (size in effect_size){
  reject <- rep(0, iter)
  #### Draw test statistics
  for (i in 1:iter){
    ts_variance <- variances/sample_size + variances/sample_size
    effects <- c(0, 0, size)
    test_stats <- rnorm(3, effects/sqrt(ts_variance), 1)
    
    
    ## Sample true test statistic
    all_pvals <- pnorm(-abs(test_stats), 0, 1)*2
    key_pval = all_pvals[3]
    
    ## Order pvalues
    ordered_stats <- sort(all_pvals)
    
    ## Which ordered statistic is the ``true" one 
    key_element = (1:M)[which(ordered_stats == key_pval)]
    
    ## Do we reject?
    thresholds <- ((1:M)/M)*alpha
    
    ## If nothing is significant
    if (sum(ordered_stats < thresholds) == 0){
      reject[i] <- 0
    }else{
      
      max_thresh <- max((1:M)[ordered_stats < thresholds])
      if (key_element <= max_thresh){
        reject[i] <- 1
      }else{
        reject[i] <- 0
      }
    }
    
  }
  power_results[as.character(size)] <- mean(reject)
}

power_results_1 <- power_results

###############################################

### Plot curves: this is Figure A6 in SI

pdf("figures/FigureA6.pdf", width=7, height=5)
plot(y=power_results_all, x=seq(from=0, to=.1, by=.0025), type="l", lwd=3, 
     ylab = expression(paste("Power (", alpha, "= .05), two-sided, 3 tests, BH corrected")), xlab = "Absolute True Effect Size (difference in proportions)")
lines(y=power_results_1, x=seq(from=0, to=.1, by=.0025), type="l", lwd=3, lty=2)
abline(h=0.8, lty=3, lwd=2, col="darkgrey")
abline(h=0.5, lty=3, lwd=2, col="darkgrey")
abline(h=1, lty=3, lwd=2, col="darkgrey")

legend("bottomright", lty=c(1,2), legend=c("All Nulls False", "One Null False"), lwd=3)
dev.off()

################################################################
#### Exploratory components (As in Section G of SI)
################################################################

################
### Finer-grained treatment
#################

researcher.data$womenrole[researcher.data$womenrole == -99] <- NA
researcher.data$womenrole.equal <- ifelse(researcher.data$womenrole == 1, 1, 0)
researcher.data$womenrole.home <- ifelse(researcher.data$womenrole == 2, 1, 0)
researcher.data$womenrole.dk <- ifelse(researcher.data$womenrole == 3, 1, 0)

exploratory_women_equal <- lm(womenrole.equal ~ treatment.whitewoman + treatment.blackwoman + treatment.blackman + treatment.whiteman, data=researcher.data)
exploratory_women_equal_vcov <- vcovHC(exploratory_women_equal)
coeftest(exploratory_women_equal, exploratory_women_equal_vcov)

exploratory_women_equal_pval <- round(coeftest(exploratory_women_equal, exploratory_women_equal_vcov)[2,4], 3)


prediction.frame.exploratory <- data.frame(treatment.whitewoman = c(1,0,0,0), treatment.blackwoman = c(0,1,0,0), treatment.blackman= c(0,0,1,0), treatment.whiteman = c(0,0,0,1))

exploratory_women_equal_results <- data.frame(treatments = c(paste("White Woman\nn=",sum(model.frame(exploratory_women_equal)$treatment.whitewoman==1),sep=""), 
                                            paste("Black Woman\nn=",sum(model.frame(exploratory_women_equal)$treatment.blackwoman == 1),sep=""),
                                            paste("Black Man\nn=",sum(model.frame(exploratory_women_equal)$treatment.blackman == 1),sep=""),
                                            paste("White Man\nn=",sum(model.frame(exploratory_women_equal)$treatment.whiteman == 1),sep="")),
                             point = predict(exploratory_women_equal, newdata=prediction.frame.exploratory),
                             se = sqrt(c(exploratory_women_equal_vcov[1,1] + exploratory_women_equal_vcov[2,2] + 2*exploratory_women_equal_vcov[1,2],
                                         exploratory_women_equal_vcov[1,1] + exploratory_women_equal_vcov[3,3] + 2*exploratory_women_equal_vcov[1,3],
                                         exploratory_women_equal_vcov[1,1] + exploratory_women_equal_vcov[4,4] + 2*exploratory_women_equal_vcov[1,4],
                                         exploratory_women_equal_vcov[1,1])))


exploratory_women_equal_results$lower95 <- exploratory_women_equal_results$point - qnorm(.975)*exploratory_women_equal_results$se
exploratory_women_equal_results$upper95 <- exploratory_women_equal_results$point + qnorm(.975)*exploratory_women_equal_results$se

#### Plot exploratory - white woman vs. white man on equality
pdf("figures/FigureA7.pdf", width=7, height=4)
p = ggplot(exploratory_women_equal_results, aes(y=point, x=treatments))
p = p + scale_y_continuous(name="% that say women should\nbe equal to men", limits=c(.84, .96))
p = p + geom_pointrange(aes(ymin=lower95,ymax=upper95), size= 1.5)
p = p + scale_x_discrete(name=paste("Treatment conditions\np-value of white woman vs. white man difference = ",exploratory_women_equal_pval, sep=""))
p = p + ggtitle("Effect of researcher race and gender on\nattitudes on gender equality")
p = p + theme_bw1()
print(p)
dev.off()
  





